





<!DOCTYPE html>
<html class="writer-html5" lang="zh-CN" >
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>内部函数和数学函数 &mdash; tvm 0.8.dev1982 文档</title>
  

  
  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/gallery.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/tlcpack_theme.css" type="text/css" />

  
  
    <link rel="shortcut icon" href="../../_static/tvm-logo-square.png"/>
  

  
  
  
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
        <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
        <script src="../../_static/jquery.js"></script>
        <script src="../../_static/underscore.js"></script>
        <script src="../../_static/doctools.js"></script>
        <script src="../../_static/translations.js"></script>
    
    <script type="text/javascript" src="../../_static/js/theme.js"></script>

    
    <script type="text/javascript" src="../../_static/js/tlcpack_theme.js"></script>
    <link rel="index" title="索引" href="../../genindex.html" />
    <link rel="search" title="搜索" href="../../search.html" />
    <link rel="next" title="Scan and Recurrent Kernel" href="scan.html" />
    <link rel="prev" title="Reduction" href="reduction.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    
<header class="header">
    <div class="innercontainer">
      <div class="headerInner d-flex justify-content-between align-items-center">
          <div class="headerLogo">
               <a href="https://tvm.apache.org/"><img src=https://tvm.apache.org/assets/images/logo.svg alt="logo"></a>
          </div>

          <div id="headMenu" class="headerNav">
            <button type="button" id="closeHeadMenu" class="navCloseBtn"><img src="../../_static/img/close-icon.svg" alt="Close"></button>
             <ul class="nav">
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/community>Community</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/download>Download</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/vta>VTA</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/blog>Blog</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/docs>Docs</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvmconf.org>Conference</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://github.com/apache/tvm/>Github</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvmchinese.github.io/declaration_zh_CN.html>About-Translators</a>
                </li>
             </ul>
               <div class="responsivetlcdropdown">
                 <button type="button" class="btn-link">
                   ASF
                 </button>
                 <ul>
                     <li>
                       <a href=https://apache.org/>Apache Homepage</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/licenses/>License</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/security/>Security</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/events/current-event>Events</a>
                     </li>
                     <li>
                       <a href=https://www.zhihu.com/column/c_1429578595417563136>Zhihu</a>
                     </li>
                 </ul>
               </div>
          </div>
            <div class="responsiveMenuIcon">
              <button type="button" id="menuBtn" class="btn-menu"><img src="../../_static/img/menu-icon.svg" alt="Menu Icon"></button>
            </div>

            <div class="tlcDropdown">
              <div class="dropdown">
                <button type="button" class="btn-link dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
                  ASF
                </button>
                <div class="dropdown-menu dropdown-menu-right">
                  <ul>
                     <li>
                       <a href=https://apache.org/>Apache Homepage</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/licenses/>License</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/security/>Security</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/events/current-event>Events</a>
                     </li>
                     <li>
                       <a href=https://www.zhihu.com/column/c_1429578595417563136>Zhihu</a>
                     </li>
                  </ul>
                </div>
              </div>
          </div>
       </div>
    </div>
 </header>
 
    <nav data-toggle="wy-nav-shift" class="wy-nav-side fixed">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="../../index.html">
          

          
            
            <img src="../../_static/tvm-logo-small.png" class="logo" alt="Logo"/>
          
          </a>

          
            
            
                <div class="version">
                  0.8.dev1982
                </div>
            
          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <p class="caption" role="heading"><span class="caption-text">如何开始</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../install/index.html">安装 TVM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contribute/index.html">贡献者指南</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">用户引导</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../tutorial/index.html">User Tutorial</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">How To Guides</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../compile_models/index.html">编译深度学习模型</a></li>
<li class="toctree-l2"><a class="reference internal" href="../deploy/index.html">TVM 部署模型和集成</a></li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_relay/index.html">Work With Relay</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="index.html">Work With Tensor Expression and Schedules</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="schedule_primitives.html">TVM中的调度原语</a></li>
<li class="toctree-l3"><a class="reference internal" href="reduction.html">Reduction</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">内部函数和数学函数</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#direct-declare-extern-math-call">直接声明外部数学调用</a></li>
<li class="toctree-l4"><a class="reference internal" href="#unified-intrinsic-call">Unified Intrinsic Call</a></li>
<li class="toctree-l4"><a class="reference internal" href="#intrinsic-lowering-rule">Intrinsic Lowering Rule</a></li>
<li class="toctree-l4"><a class="reference internal" href="#add-your-own-intrinsic">Add Your Own Intrinsic</a></li>
<li class="toctree-l4"><a class="reference internal" href="#summary">总结</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="scan.html">Scan and Recurrent Kernel</a></li>
<li class="toctree-l3"><a class="reference internal" href="extern_op.html">External Tensor Functions</a></li>
<li class="toctree-l3"><a class="reference internal" href="tensorize.html">Use Tensorize to Leverage Hardware Intrinsics</a></li>
<li class="toctree-l3"><a class="reference internal" href="tuple_inputs.html">Compute and Reduce with Tuple Inputs</a></li>
<li class="toctree-l3"><a class="reference internal" href="tedd.html">Use Tensor Expression Debug Display (TEDD) for Visualization</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../optimize_operators/index.html">优化张量算子</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tune_with_autotvm/index.html">Auto-Tune with Templates and AutoTVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tune_with_autoscheduler/index.html">Use AutoScheduler for Template-Free Scheduling</a></li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_microtvm/index.html">Work With microTVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../extend_tvm/index.html">Extend TVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../profile/index.html">Profile Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../errors.html">Handle TVM Errors</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq.html">常见提问</a></li>
</ul>
</li>
</ul>
<p class="caption" role="heading"><span class="caption-text">开发者引导</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../dev/tutorial/index.html">Developer Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dev/how_to/how_to.html">开发者指南</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">架构指南</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../arch/index.html">Design and Architecture</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">主题引导</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../topic/microtvm/index.html">microTVM：裸机使用TVM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../topic/vta/index.html">VTA: Versatile Tensor Accelerator</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">参考指南</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../reference/langref/index.html">语言参考</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/api/python/index.html">Python API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/api/links.html">Other APIs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/publications.html">Publications</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../genindex.html">索引</a></li>
</ul>

            
          
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
      
      <nav class="wy-nav-top" aria-label="top navigation" data-toggle="wy-nav-top">
        
            <div class="togglemenu">

            </div>
            <div class="nav-content">
              <!-- tvm -->
              Table of content
            </div>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        

          




















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="../../index.html">Docs</a> <span class="br-arrow">></span></li>
        
          <li><a href="../index.html">How To Guides</a> <span class="br-arrow">></span></li>
        
          <li><a href="index.html">Work With Tensor Expression and Schedules</a> <span class="br-arrow">></span></li>
        
      <li>内部函数和数学函数</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
            <a href="../../_sources/how_to/work_with_schedules/intrin_math.rst.txt" rel="nofollow"> <img src="../../_static//img/source.svg" alt="viewsource"/></a>
          
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="sphx-glr-download-link-note admonition note">
<p class="admonition-title">注解</p>
<p>Click <a class="reference internal" href="#sphx-glr-download-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">here</span></a> to download the full example code</p>
</div>
<div class="sphx-glr-example-title section" id="intrinsics-and-math-functions">
<span id="sphx-glr-how-to-work-with-schedules-intrin-math-py"></span><h1>内部函数和数学函数<a class="headerlink" href="#intrinsics-and-math-functions" title="永久链接至标题">¶</a></h1>
<p><strong>作者</strong>: <a class="reference external" href="https://tqchen.github.io">Tianqi Chen</a></p>
<p>尽管TVM支持基本的算术运算。但是在许多情况下，我们需要更复杂的内置函数。例如:code:<a href="#id1"><span class="problematic" id="id2">`</span></a>exp`来取函数的指数。</p>
<p>这些函数依赖于目标系统，可能具有不同目标平台的不同名称。在本教程中，我们将学习如何调用这些特定于目标的函数，以及如何通过tvm的内部API统一接口。</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">__future__</span> <span class="k">import</span> <span class="n">absolute_import</span><span class="p">,</span> <span class="n">print_function</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>

<span class="kn">import</span> <span class="nn">tvm</span>
<span class="kn">from</span> <span class="nn">tvm</span> <span class="k">import</span> <span class="n">te</span>
<span class="kn">from</span> <span class="nn">tvm.ir</span> <span class="k">import</span> <span class="n">register_op_attr</span><span class="p">,</span> <span class="n">register_intrin_lowering</span>
</pre></div>
</div>
<div class="section" id="direct-declare-extern-math-call">
<h2>直接声明外部数学调用<a class="headerlink" href="#direct-declare-extern-math-call" title="永久链接至标题">¶</a></h2>
<p>The most straight-forward way to call target specific function is via
extern function call construct in tvm.
In the following example, we use <a class="reference internal" href="../../reference/api/python/tir.html#tvm.tir.call_pure_extern" title="tvm.tir.call_pure_extern"><code class="xref any py py-func docutils literal notranslate"><span class="pre">tvm.tir.call_pure_extern</span></code></a> to call
<code class="code docutils literal notranslate"><span class="pre">__expf</span></code> function, which is only available under CUDA.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">n</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">var</span><span class="p">(</span><span class="s2">&quot;n&quot;</span><span class="p">)</span>
<span class="n">A</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">placeholder</span><span class="p">((</span><span class="n">n</span><span class="p">,),</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;A&quot;</span><span class="p">)</span>
<span class="n">B</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">compute</span><span class="p">(</span><span class="n">A</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">call_pure_extern</span><span class="p">(</span><span class="s2">&quot;float32&quot;</span><span class="p">,</span> <span class="s2">&quot;__expf&quot;</span><span class="p">,</span> <span class="n">A</span><span class="p">[</span><span class="n">i</span><span class="p">]),</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;B&quot;</span><span class="p">)</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">create_schedule</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="p">)</span>
<span class="n">num_thread</span> <span class="o">=</span> <span class="mi">64</span>
<span class="n">bx</span><span class="p">,</span> <span class="n">tx</span> <span class="o">=</span> <span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">axis</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">factor</span><span class="o">=</span><span class="n">num_thread</span><span class="p">)</span>
<span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">bx</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;blockIdx.x&quot;</span><span class="p">))</span>
<span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">tx</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;threadIdx.x&quot;</span><span class="p">))</span>
<span class="n">f</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">],</span> <span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;myexp&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">imported_modules</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get_source</span><span class="p">())</span>
</pre></div>
</div>
<p class="sphx-glr-script-out">输出:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>#ifdef _WIN32
  using uint = unsigned int;
  using uchar = unsigned char;
  using ushort = unsigned short;
  using int64_t = long long;
  using uint64_t = unsigned long long;
#else
  #define uint unsigned int
  #define uchar unsigned char
  #define ushort unsigned short
  #define int64_t long long
  #define uint64_t unsigned long long
#endif
extern &quot;C&quot; __global__ void __launch_bounds__(64) myexp_kernel0(float* __restrict__ B, float* __restrict__ A, int n, int stride, int stride1) {
  if (((int)blockIdx.x) &lt; (n &gt;&gt; 6)) {
    B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = __expf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
  } else {
    if (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) &lt; n) {
      B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = __expf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
    }
  }
}
</pre></div>
</div>
</div>
<div class="section" id="unified-intrinsic-call">
<h2>Unified Intrinsic Call<a class="headerlink" href="#unified-intrinsic-call" title="永久链接至标题">¶</a></h2>
<p>The above code verifies that direct external call can be used to
call into device specific functions.
However, the above way only works for CUDA target with float type.
Ideally, we want to write same code for any device and any data type.</p>
<p>TVM intrinsic provides the user a mechanism to achieve this, and this
is the recommended way to solve the problem.
The following code use te.exp instead, which create an intrinsic call
:py:<a class="reference internal" href="../../reference/api/python/te.html#tvm.te.exp" title="tvm.te.exp"><code class="xref py py-func docutils literal notranslate"><span class="pre">tvm.te.exp()</span></code></a> to do the exponential.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">n</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">var</span><span class="p">(</span><span class="s2">&quot;n&quot;</span><span class="p">)</span>
<span class="n">A</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">placeholder</span><span class="p">((</span><span class="n">n</span><span class="p">,),</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;A&quot;</span><span class="p">)</span>
<span class="n">B</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">compute</span><span class="p">(</span><span class="n">A</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">te</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">A</span><span class="p">[</span><span class="n">i</span><span class="p">]),</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;B&quot;</span><span class="p">)</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">create_schedule</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="p">)</span>
<span class="n">num_thread</span> <span class="o">=</span> <span class="mi">64</span>
<span class="n">bx</span><span class="p">,</span> <span class="n">tx</span> <span class="o">=</span> <span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">axis</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">factor</span><span class="o">=</span><span class="n">num_thread</span><span class="p">)</span>
<span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">bx</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;blockIdx.x&quot;</span><span class="p">))</span>
<span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">tx</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;threadIdx.x&quot;</span><span class="p">))</span>
<span class="n">fcuda</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">],</span> <span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;myexp&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fcuda</span><span class="o">.</span><span class="n">imported_modules</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get_source</span><span class="p">())</span>
</pre></div>
</div>
<p class="sphx-glr-script-out">输出:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>#ifdef _WIN32
  using uint = unsigned int;
  using uchar = unsigned char;
  using ushort = unsigned short;
  using int64_t = long long;
  using uint64_t = unsigned long long;
#else
  #define uint unsigned int
  #define uchar unsigned char
  #define ushort unsigned short
  #define int64_t long long
  #define uint64_t unsigned long long
#endif
extern &quot;C&quot; __global__ void __launch_bounds__(64) myexp_kernel0(float* __restrict__ B, float* __restrict__ A, int n, int stride, int stride1) {
  if (((int)blockIdx.x) &lt; (n &gt;&gt; 6)) {
    B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = __expf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
  } else {
    if (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) &lt; n) {
      B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = __expf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
    }
  }
}
</pre></div>
</div>
<p>We can find that the code works for both CUDA and opencl.
The same te.exp can also be used for float64 data types.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">fopencl</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">],</span> <span class="s2">&quot;opencl&quot;</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;myexp&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fopencl</span><span class="o">.</span><span class="n">imported_modules</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get_source</span><span class="p">())</span>
</pre></div>
</div>
<p class="sphx-glr-script-out">输出:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>// Function: myexp_kernel0
__kernel void myexp_kernel0(__global float* restrict B, __global float* restrict A, int n, int stride, int stride1) {
  if (((int)get_group_id(0)) &lt; (n &gt;&gt; 6)) {
    B[((((((int)get_group_id(0)) * 64) + ((int)get_local_id(0))) * stride1))] = exp(A[((((((int)get_group_id(0)) * 64) + ((int)get_local_id(0))) * stride))]);
  } else {
    if (((((int)get_group_id(0)) * 64) + ((int)get_local_id(0))) &lt; n) {
      B[((((((int)get_group_id(0)) * 64) + ((int)get_local_id(0))) * stride1))] = exp(A[((((((int)get_group_id(0)) * 64) + ((int)get_local_id(0))) * stride))]);
    }
  }
}
</pre></div>
</div>
</div>
<div class="section" id="intrinsic-lowering-rule">
<h2>Intrinsic Lowering Rule<a class="headerlink" href="#intrinsic-lowering-rule" title="永久链接至标题">¶</a></h2>
<p>When <a class="reference internal" href="../../reference/api/python/te.html#tvm.te.exp" title="tvm.te.exp"><code class="xref py py-func docutils literal notranslate"><span class="pre">tvm.te.exp()</span></code></a> is called, TVM creates an intrinsic Call Expr.
TVM uses transformation rules to transform the intrinsic
call to device specific extern calls.</p>
<p>TVM also allows user to customize the rules during runtime.
The following example customizes CUDA lowering rule for <code class="code docutils literal notranslate"><span class="pre">exp</span></code>.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">my_cuda_math_rule</span><span class="p">(</span><span class="n">op</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;Customized CUDA intrinsic lowering rule&quot;&quot;&quot;</span>
    <span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">op</span><span class="p">,</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">Call</span><span class="p">)</span>
    <span class="n">name</span> <span class="o">=</span> <span class="n">op</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">name</span>
    <span class="k">assert</span> <span class="n">name</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;tir.&quot;</span><span class="p">)</span>
    <span class="n">dispatch_name</span> <span class="o">=</span> <span class="n">name</span><span class="p">[</span><span class="mi">4</span><span class="p">:]</span>
    <span class="k">if</span> <span class="n">op</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="s2">&quot;float32&quot;</span><span class="p">:</span>
        <span class="c1"># call float function</span>
        <span class="k">return</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">call_pure_extern</span><span class="p">(</span><span class="s2">&quot;float32&quot;</span><span class="p">,</span> <span class="s2">&quot;</span><span class="si">%s</span><span class="s2">f&quot;</span> <span class="o">%</span> <span class="n">dispatch_name</span><span class="p">,</span> <span class="n">op</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
    <span class="k">elif</span> <span class="n">op</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="s2">&quot;float64&quot;</span><span class="p">:</span>
        <span class="c1"># call double function</span>
        <span class="k">return</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">call_pure_extern</span><span class="p">(</span><span class="s2">&quot;float32&quot;</span><span class="p">,</span> <span class="n">dispatch_name</span><span class="p">,</span> <span class="n">op</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
    <span class="k">else</span><span class="p">:</span>
        <span class="c1"># cannot do translation, return self.</span>
        <span class="k">return</span> <span class="n">op</span>


<span class="n">register_intrin_lowering</span><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">=</span><span class="n">my_cuda_math_rule</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">99</span><span class="p">)</span>
</pre></div>
</div>
<p>Register the rule to TVM with override option to override existing rule.
Notice the difference between the printed code from previous one:
our new rule uses math function <code class="code docutils literal notranslate"><span class="pre">expf</span></code> instead of
fast math version <code class="code docutils literal notranslate"><span class="pre">__expf</span></code>.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">fcuda</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">],</span> <span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;myexp&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fcuda</span><span class="o">.</span><span class="n">imported_modules</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get_source</span><span class="p">())</span>
</pre></div>
</div>
<p class="sphx-glr-script-out">输出:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>#ifdef _WIN32
  using uint = unsigned int;
  using uchar = unsigned char;
  using ushort = unsigned short;
  using int64_t = long long;
  using uint64_t = unsigned long long;
#else
  #define uint unsigned int
  #define uchar unsigned char
  #define ushort unsigned short
  #define int64_t long long
  #define uint64_t unsigned long long
#endif
extern &quot;C&quot; __global__ void __launch_bounds__(64) myexp_kernel0(float* __restrict__ B, float* __restrict__ A, int n, int stride, int stride1) {
  if (((int)blockIdx.x) &lt; (n &gt;&gt; 6)) {
    B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = expf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
  } else {
    if (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) &lt; n) {
      B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = expf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
    }
  }
}
</pre></div>
</div>
</div>
<div class="section" id="add-your-own-intrinsic">
<h2>Add Your Own Intrinsic<a class="headerlink" href="#add-your-own-intrinsic" title="永久链接至标题">¶</a></h2>
<p>If there is an intrinsic that is not provided by TVM.
User can easily add new intrinsic by using the intrinsic rule system.
The following example add an intrinsic <code class="code docutils literal notranslate"><span class="pre">mylog</span></code> to the system.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">mylog</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;customized log intrinsic function&quot;&quot;&quot;</span>
    <span class="k">return</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">call_intrin</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="s2">&quot;tir.mylog&quot;</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>


<span class="k">def</span> <span class="nf">my_cuda_mylog_rule</span><span class="p">(</span><span class="n">op</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;CUDA lowering rule for log&quot;&quot;&quot;</span>
    <span class="k">if</span> <span class="n">op</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="s2">&quot;float32&quot;</span><span class="p">:</span>
        <span class="k">return</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">call_pure_extern</span><span class="p">(</span><span class="s2">&quot;float32&quot;</span><span class="p">,</span> <span class="s2">&quot;logf&quot;</span><span class="p">,</span> <span class="n">op</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
    <span class="k">elif</span> <span class="n">op</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="s2">&quot;float64&quot;</span><span class="p">:</span>
        <span class="k">return</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">call_pure_extern</span><span class="p">(</span><span class="s2">&quot;float64&quot;</span><span class="p">,</span> <span class="s2">&quot;log&quot;</span><span class="p">,</span> <span class="n">op</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
    <span class="k">else</span><span class="p">:</span>
        <span class="k">return</span> <span class="n">op</span>


<span class="c1"># new op registration is triggered by registering an attribute of the op</span>
<span class="n">register_op_attr</span><span class="p">(</span><span class="s2">&quot;tir.mylog&quot;</span><span class="p">,</span> <span class="s2">&quot;TCallEffectKind&quot;</span><span class="p">,</span> <span class="n">tvm</span><span class="o">.</span><span class="n">tir</span><span class="o">.</span><span class="n">CallEffectKind</span><span class="o">.</span><span class="n">Pure</span><span class="p">)</span>
<span class="n">register_intrin_lowering</span><span class="p">(</span><span class="s2">&quot;tir.mylog&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">=</span><span class="n">my_cuda_mylog_rule</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">99</span><span class="p">)</span>

<span class="n">n</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">var</span><span class="p">(</span><span class="s2">&quot;n&quot;</span><span class="p">)</span>
<span class="n">A</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">placeholder</span><span class="p">((</span><span class="n">n</span><span class="p">,),</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;A&quot;</span><span class="p">)</span>
<span class="n">B</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">compute</span><span class="p">(</span><span class="n">A</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">mylog</span><span class="p">(</span><span class="n">A</span><span class="p">[</span><span class="n">i</span><span class="p">]),</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;B&quot;</span><span class="p">)</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">create_schedule</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="p">)</span>
<span class="n">num_thread</span> <span class="o">=</span> <span class="mi">64</span>
<span class="n">bx</span><span class="p">,</span> <span class="n">tx</span> <span class="o">=</span> <span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">axis</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">factor</span><span class="o">=</span><span class="n">num_thread</span><span class="p">)</span>
<span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">bx</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;blockIdx.x&quot;</span><span class="p">))</span>
<span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">tx</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;threadIdx.x&quot;</span><span class="p">))</span>
<span class="n">fcuda</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">],</span> <span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;mylog&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">fcuda</span><span class="o">.</span><span class="n">imported_modules</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get_source</span><span class="p">())</span>
</pre></div>
</div>
<p class="sphx-glr-script-out">输出:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>#ifdef _WIN32
  using uint = unsigned int;
  using uchar = unsigned char;
  using ushort = unsigned short;
  using int64_t = long long;
  using uint64_t = unsigned long long;
#else
  #define uint unsigned int
  #define uchar unsigned char
  #define ushort unsigned short
  #define int64_t long long
  #define uint64_t unsigned long long
#endif
extern &quot;C&quot; __global__ void __launch_bounds__(64) mylog_kernel0(float* __restrict__ B, float* __restrict__ A, int n, int stride, int stride1) {
  if (((int)blockIdx.x) &lt; (n &gt;&gt; 6)) {
    B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = logf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
  } else {
    if (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) &lt; n) {
      B[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride1))] = logf(A[((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) * stride))]);
    }
  }
}
</pre></div>
</div>
</div>
<div class="section" id="summary">
<h2>总结<a class="headerlink" href="#summary" title="永久链接至标题">¶</a></h2>
<ul class="simple">
<li><p>TVM can call extern target dependent math function.</p></li>
<li><p>Use intrinsic to defined a unified interface for the functions.</p></li>
<li><p>For more intrinsics available in tvm, take a look at <a class="reference internal" href="../../reference/api/python/tir.html#module-tvm.tir" title="tvm.tir"><code class="xref any py py-mod docutils literal notranslate"><span class="pre">tvm.tir</span></code></a></p></li>
<li><p>You can customize the intrinsic behavior by defining your own rules.</p></li>
</ul>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-schedules-intrin-math-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/d9089082842c138d4c81335f88c60c82/intrin_math.py"><code class="xref download docutils literal notranslate"><span class="pre">下载</span> <span class="pre">Python</span> <span class="pre">源代码:</span> <span class="pre">intrin_math.py</span></code></a></p>
</div>
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/1e482ba1190961191e3a0bdbd0585faa/intrin_math.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">下载</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">intrin_math.ipynb</span></code></a></p>
</div>
</div>
<p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
</div>
</div>


           </div>
           
          </div>
          

<footer>

    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="scan.html" class="btn btn-neutral float-right" title="Scan and Recurrent Kernel" accesskey="n" rel="next">下一个 <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="reduction.html" class="btn btn-neutral float-left" title="Reduction" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> 上一个</a>
      
    </div>

<div id="button" class="backtop"><img src="../../_static//img/right.svg" alt="backtop"/> </div>
<section class="footerSec">
    <div class="footerHeader">
      <ul class="d-flex align-md-items-center justify-content-between flex-column flex-md-row">
        <li class="copywrite d-flex align-items-center">
          <h5 id="copy-right-info">© 2020 Apache Software Foundation | All right reserved</h5>
        </li>
      </ul>

    </div>

    <ul>
      <li class="footernote">Copyright © 2020 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation.</li>
    </ul>

</section>
</footer>
        </div>
      </div>

    </section>

  </div>
  

    <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>
    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script>

  </body>
  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
    <!-- Theme Analytics -->
    <script>
    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
    })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

    ga('create', 'UA-75982049-2', 'auto');
    ga('send', 'pageview');
    </script>

    
   

</body>
</html>